/* OPTIONS OBS=5000  NOREPLACE ; */   


/** This programs computes the within-industry interquartile ranges of TFPR for each industry-year on:

	1.  2002 and 2007 CART-completed for non-AR CMF plants

  INPUT FILES:    

    nberces.naics5809:  NBER-CES Productivity Database (industry-level aggregates and price indices). 
    bea.bea_naics:       3- or 4-digit NAICS industry level capital data from BEA. 
 
    allcmf.cmf2002_nonar.sas7bdat
    allcmf.cmf07asm06.sas7bdat

    allcmf.gooddata_all_inds02.sas7bdat
    allcmf.gooddata_all_inds07.sas7bdat

    allcmf.imputes_all_inds02
    allcmf.imputes_all_inds07

  OUTPUT FILES:


**/


/* Read in just the 2002 price indexes, so that we can rescale the original 
   indexes to a 2002 base year. */

data nberces2002 (keep = naics PISHIP_2002  PIMAT_2002 PIEN_2002 PIINV_2002);
 set nberces.naics5809;
    if year=2002;
    PISHIP_2002 = PISHIP;
    PIMAT_2002 = PIMAT;
    PIEN_2002 = PIEN;
    PIINV_2002 = PIINV;
run;

data nberces0207;
 set nberces.naics5809;
 if year in (2002,2007);
run;
 
proc sort data=nberces0207; by naics; run;
proc sort data=nberces2002; by naics; run;

data nberces0207 in0207_not_in02 in02_not_in07;
 merge nberces0207 (in=in0207) nberces2002 (in=in02);
 by naics;
 if in0207 and in02 then output nberces0207;
 else if in0207 then output in0207_not_in02;
 else output in02_not_in07;
run;

/* Read in the NBER-CES productivity database for industry prices indices, and to 
   construct industry cost shares and capital shares. */ 
/* Rescale the price indexes so that 2002=1.000 */

data nberces0207 (keep = naics_char year alpha_K alpha_L alpha_E alpha_M PISHIP_new PIMAT_new PIEN_new PIINV_new 
                          keqshare kstshare);
 set nberces0207;
  alpha_L = pay/vship;
  alpha_E = energy/vship;
  alpha_M = (matcost-energy)/vship;
  alpha_K = 1 - sum(alpha_L,alpha_E,alpha_M);  
  PISHIP_new = PISHIP/PISHIP_2002;
  PIMAT_new = PIMAT/PIMAT_2002;
  PIEN_new = PIEN/PIEN_2002;
  PIINV_new = PIINV/PIINV_2002;

  totcapital = equip + plant;
  keqshare = equip/totcapital;  /* Industry-level equipment capital's share */
  kstshare = plant/totcapital;  /* Industry-level structures capital's share */
  naics_char = put(NAICS,6.);

run;

proc datasets library = work nolist;
  modify nberces0207;
  rename naics_char= NAICS;
RUN;


/* Read in the 2002 and 2007 BEA capital stock dataset (for converting book values to market values of capital stocks).  
   NAICS_BEA are mostly 3-digit NAICS codes, but some are ranges of codes (e.g., 3361-3363).
   So I need to create a new "NAICS4_BEA" variable to be able to merge the BEA data to the CMF data. 
*/

data bea_capital (keep = NAICS4_BEA year gkheq gkhst nkceq nkcst);
 set bea.bea_naics;
 if year in (2002,2007);
 NAICS4 = substr(NAICS_BEA,1,4);
 NAICS7 = substr(NAICS_BEA,1,7);
 NAICS9 = substr(NAICS_BEA,1,9);
 if NAICS4 in ("321 " , "322 " , "323 " , "324 " , "325 " , "326 " , "327 " , "331 " , "332 " , "333 " , "334 " , "335 " , "337 " , "339 ") 
 then NAICS4_BEA = NAICS4;
 else if NAICS7 = "311-312" 
 then NAICS4_BEA = "311 ";
 else if NAICS7 = "313-314" 
 then NAICS4_BEA = "313 ";
 else if NAICS7 = "315-316" 
 then NAICS4_BEA = "315 ";
 else if NAICS9 = "3361-3363"
 then NAICS4_BEA = "3361";
 else if NAICS9 = "3364-3369"
 then NAICS4_BEA = "3364";
 if NAICS4_BEA ne .;
run;


%macro merge_industry_plant(dataset=);

	/* Create a new industry variable for merging to the BEA capital data. */

	data &dataset (keep = survu_id firmid NAICS_NEW_6 NAICS4_BEA tvs dinv cm energycmratio sw ph wwswratio tae year _IMPUTE_);
	 set &dataset;
	   NAICS3 = substr(NAICS_NEW_6,1,3);
	   NAICS4 = substr(NAICS_NEW_6,1,4);
	   if 	   NAICS3 = "321" then NAICS4_BEA = "321 ";
	   else if NAICS3 = "322" then NAICS4_BEA = "322 "; 
	   else if NAICS3 = "323" then NAICS4_BEA = "323 "; 
	   else if NAICS3 = "324" then NAICS4_BEA = "324 "; 
	   else if NAICS3 = "325" then NAICS4_BEA = "325 "; 
	   else if NAICS3 = "326" then NAICS4_BEA = "326 "; 
	   else if NAICS3 = "327" then NAICS4_BEA = "327 "; 
	   else if NAICS3 = "331" then NAICS4_BEA = "331 "; 
	   else if NAICS3 = "332" then NAICS4_BEA = "332 "; 
	   else if NAICS3 = "333" then NAICS4_BEA = "333 "; 
	   else if NAICS3 = "334" then NAICS4_BEA = "334 "; 
	   else if NAICS3 = "335" then NAICS4_BEA = "335 "; 
	   else if NAICS3 = "337" then NAICS4_BEA = "337 "; 
	   else if NAICS3 = "339" then NAICS4_BEA = "339 "; 
	   else if NAICS3 in ("311","312")
	   then NAICS4_BEA = "311 ";
	   else if NAICS3 in ("313","314")
	   then NAICS4_BEA = "313 ";
	   else if NAICS3 in ("315","316")
	   then NAICS4_BEA = "315 ";
	   else if NAICS4 in ("3361","3362","3363")
	   then NAICS4_BEA = "3361";
	   else if NAICS4 in ("3364","3365","3366","3367","3368","3369")
	   then NAICS4_BEA = "3364";
	run;

	/* Rename the 6-digit NAICS code variable for merging with the deflators dataset. */
	proc datasets library=work nolist;
	  modify &dataset;
	 rename NAICS_NEW_6 = NAICS;
	run;

	proc sort data=&dataset; by NAICS YEAR; run;
	proc sort data=nberces0207; by NAICS YEAR; run;

	/* Merge NBER deflators and cost shares with the plant-level CMF data. */

	data &dataset in_cmf_not_in_nber in_nber_not_in_cmf;
	 merge  &dataset (in=incmf) nberces0207 (in=innber); 
	 by NAICS year;
	 if incmf and innber then output &dataset;
	 else if incmf then output in_cmf_not_in_nber;
	 else output in_nber_not_in_cmf;
	run;

	proc sort data=&dataset; by NAICS4_BEA year; run;
	proc sort data=bea_capital; by NAICS4_BEA year; run;

	/* Merge BEA capital data with plant-level CMF data. */
	/* NOTE: for non-matches, we are only concerned about
	   records in the CMF that don't match to a BEA industry. */

	data &dataset in_bea_not_in_cmf in_cmf_not_in_bea;
	 merge bea_capital (in=inbea) &dataset (in=incmf);
	 by NAICS4_BEA year;
	 if inbea and incmf then output &dataset;
	 else if inbea then output in_bea_not_in_cmf;
	 else output in_cmf_not_in_bea;
	run; 

	/*  Look at the plants in the CMF that did't merge to the BEA dataset. */
	proc freq data=IN_CMF_NOT_IN_BEA;
	 tables NAICS4_BEA*year NAICS*year ;
	title "Plants in CMF that did not match to the BEA dataset";
	run;

	/* Convert nominal variables to real;
	   Impute production-worker-equivalent hours; 
	   Convert book values of assets to real market values of capital stock;
	   Calculate TFPR;
	*/
	data &dataset;
	 set &dataset;
	   **dinv = tie - tib;   *dinv was constructed before imputation;

	   gross_output = sum(tvs,dinv);
	   q = gross_output/piship_new;
	   if q > 0 then lq = log(q);

	   keqst = keqshare*tae*(nkceq/gkheq)/piinv_new;  * market value of stock of equipment;
	   kstst = kstshare*tae*(nkcst/gkhst)/piinv_new;  * market value of stock of structures;
	   kst = sum(keqst,kstst);
	   if kst > 0 then lk = log(kst);

	   if wwswratio>0 then th=ph/wwswratio;
	   if th>0 then lth = log(th);

	   if energycmratio>0 and cm>0 then cost_energy = energycmratio*cm;
	   realenergy = cost_energy/pien_new;
	   if realenergy>0 then le = log(realenergy);

	   realcm = cm/pimat_new;
	   matq = realcm - realenergy;
	   if matq>0 then lm = log(matq);
   
	   tfpr = lq - (alpha_K*lk + alpha_L*lth + alpha_E*le + alpha_M*lm);
	run; 


	 proc sort data=&dataset; by year NAICS _IMPUTE_; RUN;

	 proc means data=&dataset qrange NOPRINT;
	  var tfpr;
	 by year NAICS _IMPUTE_;
	 output out=&dataset._tfpr_iqrs (keep = year NAICS _IMPUTE_ tfpr_iqr) qrange=tfpr_iqr ;
	 run;

        /* Now calculate the means of the IQRs across the 100 implicates. */
        
	data &dataset._tfpr_iqrs;
	 set &dataset._tfpr_iqrs;
	  tfpr7525 = exp(tfpr_iqr);
	run;

 	proc means data=&dataset._tfpr_iqrs mean;
          var tfpr_iqr tfpr7525;
          by year NAICS;
          output out=allcmf.imputes_0207_tfpr_iqr_means (keep = year NAICS tfpr_iqr_mean tfpr7525_mean) mean(tfpr_iqr)=tfpr_iqr_mean mean(tfpr7525)=tfpr7525_mean;
  	run;

%mend;



data imputes_all_inds0207;
 set allcmf.imputes_all_inds02
     allcmf.imputes_all_inds07;
run;

%merge_industry_plant(dataset=imputes_all_inds0207);


/**************/
/*  Table XX  */
/**************/


proc sort data = allcmf.imputes_0207_tfpr_iqr_means; by year NAICS; run;

proc means data= allcmf.imputes_0207_tfpr_iqr_means q1 median mean stddev q3;
 var tfpr_iqr_mean ;
by year;
title1 "Table XX: Distribution of NAIC6 industry IQRs of TFPR in CART-completed data";
title2 "Means of IQR estimates from 100 implicates";
run;

proc means data= allcmf.imputes_0207_tfpr_iqr_means q1 median mean stddev q3;
 var tfpr7525_mean ;
by year;
title1 "Table XX: Distribution of NAIC6 industry 75-25 ratios of TFPR in CART-completed data";
title2 "Means of 75-25 ratio estimates from 100 implicates";
run;

